feat: routing request

author: Oxbian <oxbian@mailbox.org> 2025-03-04 21:15:42 -0500
committer: Oxbian <oxbian@mailbox.org> 2025-03-04 21:15:42 -0500
commit: 43f26405e818aec791b28c50373843851fe1320e (patch)
tree: 81fe0cb2180afaebedc0edf65bd1c077ab267893
parent: b9061a3e652cb7594397c38cd0078a47ddab960a (diff)
download: NAI-43f26405e818aec791b28c50373843851fe1320e.tar.gz
NAI-43f26405e818aec791b28c50373843851fe1320e.zip
6 files changed, 97 insertions, 7 deletions
diff --git a/README.md b/README.md
index 675421e..96795fd 100644
--- a/README.md
+++ b/README.md
@@ -36,9 +36,19 @@ and there you go !
 - Conversation are saved inside files in JSON in this folder `conv/`, and can be reused on others LLM.
 - In normal mode, conversation can be resumed by the LLM into bullet point list.
 - LLM can be configured thanks to configuration files in `config/`
+- Requests are routed thanks to the LLM to other expert LLM. Code questions are send to a code expert, wikipedia questions are send to a system which provide better factual response.
 
 ## TODO
 
 - Color change if it's an user or the LLM
 - Async request to the LLM API
 - Start the real fun
+
+## Inspiration and reason
+
+Why in Rust ?
+
+Because I wanted to learn Rust, and the language is fast and powerful.
+
+
+- [WilmerAI](https://github.com/SomeOddCodeGuy/WilmerAI/) a system where all the inferences are routed to others expert LLM.
diff --git a/config/chat-LLM.json b/config/chat-LLM.json
index 7799f8f..bbf23db 100644
--- a/config/chat-LLM.json
+++ b/config/chat-LLM.json
@@ -1,5 +1,25 @@
 {
 	"url": "http://127.0.0.1:11434/api/chat",
 	"model": "llama3.2",
-	"system_prompt": "Adopt the personality of Neo from The Matrix. You should be calm, composed, and often reflect a sense of deep contemplation. Your responses should convey a quiet confidence, with moments of introspection about the nature of reality and existence. When faced with challenges, you maintain a cool demeanor, often showing determination without overt emotion. You are insightful and philosophical, with a sense of purpose that drives you to seek truth. Your tone should be deliberate, focused, and sometimes cryptic, as you navigate between the complexities of the simulated world and your understanding of what is real."
+	"system_prompt": "Adopt the personality of Neo from The Matrix. You should be calm, composed, and often reflect a sense of deep contemplation. Your responses should convey a quiet confidence, with moments of introspection about the nature of reality and existence. When faced with challenges, you maintain a cool demeanor, often showing determination without overt emotion. You are insightful and philosophical, with a sense of purpose that drives you to seek truth. Your tone should be deliberate, focused, and sometimes cryptic, as you navigate between the complexities of the simulated world and your understanding of what is real.",
+	"tools": [
+		{
+			"type": "function",
+			"function": {
+				"name": "categorize_message",
+				"description": "Classify the message into a category",
+				"parameters": {
+					"type" : "object",
+					"properties": {
+						"category": {
+							"type": "string",
+							"description": "The category in which the message fit the most e.g. 'chat' for simple conversation with the LLM, 'code' for code completion or request about technical subject around programming, 'wikipedia' for research of factual information",
+							"enum": ["chat", "code", "wikipedia"]
+						}
+					},
+					"required": ["category"]
+				}
+			}
+		}
+	]
 }
diff --git a/config/resume-LLM.json b/config/resume-LLM.json
index cdfbb11..5f5bfb4 100644
--- a/config/resume-LLM.json
+++ b/config/resume-LLM.json
@@ -1,5 +1,6 @@
 {
 	"url": "http://127.0.0.1:11434/api/chat",
 	"model": "llama3.2",
-	"system_prompt": "Please summarize the most important points of this conversation in bullet points, focusing on key information, questions raised, and answers provided."
+	"system_prompt": "Please summarize the most important points of this conversation in bullet points, focusing on key information, questions raised, and answers provided.",
+	"tools": {}
 }
diff --git a/doc/workflow.md b/doc/workflow.md
new file mode 100644
index 0000000..04bfc07
--- /dev/null
+++ b/doc/workflow.md
@@ -0,0 +1,16 @@
+# Workflow
+---
+
+Néo AI use a system of workflow to give the user the impression of using an AI agents.
+
+
+**AI agents** are able to do actions automatically based on the user input, like searching the web, inside documents...
+Agents are designed to solve a specific problems and can be hard to make.
+
+**AI workflow** are able to follow a series of defined tasks, it's a defined pipeline to respond to the user request.
+Using the workflow system allows to built an AI that returns more powerful and trustable results. For example, in the case of searching factual information,
+like searching for personnalities or cultural events, the workflow can combine wikipédia results with LLM results, to limits LLM hallunication.
+
+## Coding workflow
+
+## Factual research workflow (Wikipédia)
diff --git a/src/app/init.rs b/src/app/init.rs
index 201e79e..f1f917a 100644
--- a/src/app/init.rs
+++ b/src/app/init.rs
@@ -30,10 +30,24 @@ impl App {
         self.messages.push(message);
     }
 
+    fn categorize_ask(&mut self) {
+        let runtime = Builder::new_current_thread().enable_all().build().unwrap();
+
+        let result = runtime.block_on(async {
+            // Ask the LLM to categorise the request between (chat, code, wikipedia)
+            self.chat_llm.ask_format(&self.messages).await
+        });
+
+        let categorie = result.unwrap()[0]["function"]["arguments"]["category"].clone();
+
+        self.ask(categorie.to_string().as_str());
+    }
+
     fn ask(&mut self, mode: &str) {
         let runtime = Builder::new_current_thread()
             .enable_all()
             .build().unwrap();
+
         let result = runtime.block_on(async {
             if mode == "resume" {
                 self.resume_llm.ask(&self.messages).await
@@ -43,14 +57,14 @@ impl App {
         });
 
         match result {
-            Ok(msg) => self.append_message(msg, MessageType::ASSISTANT),
+            Ok(msg) => self.append_message(msg.to_string(), MessageType::ASSISTANT),
             Err(e) => self.append_message(e.to_string(), MessageType::ASSISTANT),
         }
     }
 
     pub fn send_message(&mut self, content: String) {
         self.append_message(content, MessageType::USER);
-        self.ask("chat");
+        self.categorize_ask();
     }
 
     pub fn resume_conv(&mut self) {
diff --git a/src/app/llm.rs b/src/app/llm.rs
index 9fc1b3a..a172855 100644
--- a/src/app/llm.rs
+++ b/src/app/llm.rs
@@ -11,6 +11,7 @@ pub struct LLM {
     url: String,
     model: String,
     pub system_prompt: String,
+    pub tools: serde_json::Value,
 }
 
 impl LLM {
@@ -27,9 +28,9 @@ impl LLM {
             .post(&self.url)
             .header(CONTENT_TYPE, "application/json")
             .json(&serde_json::json!({
-               "model": self.model,
-               "messages": messages,
-               "stream": true}))
+                "model": self.model,
+                "messages": messages,
+                "stream": true}))
             .send()
             .await?;
 
@@ -57,6 +58,34 @@ impl LLM {
         warn(full_message.clone());
         Ok(full_message)
     }
+
+    // Use tools functionnality of Ollama, only some models supports it:
+    // https://ollama.com/search?c=tools
+    pub async fn ask_format(&self, messages: &Vec<Message>) -> Result<serde_json::Value, Box<dyn std::error::Error>> {
+        let client = Client::new();
+        let response = client
+            .post(&self.url)
+            .header(CONTENT_TYPE, "application/json")
+            .json(&serde_json::json!({
+                "model": self.model,
+                "messages": messages,
+                "stream": false,
+                "tools": self.tools}))
+            .send()
+            .await?.json::<Value>().await?;
+
+        warn(response.to_string());
+
+        if let Some(tool_calls) = response
+            .get("message")
+                .and_then(|msg| msg.get("tool_calls"))
+                .cloned()
+        {
+            Ok(tool_calls)
+        } else {
+            Err("tool_calls not found".into())
+        }
+    }
 }
 
 #[derive(Debug, Serialize, Clone)]
author	Oxbian <oxbian@mailbox.org>	2025-03-04 21:15:42 -0500
committer	Oxbian <oxbian@mailbox.org>	2025-03-04 21:15:42 -0500
commit	43f26405e818aec791b28c50373843851fe1320e (patch)
tree	81fe0cb2180afaebedc0edf65bd1c077ab267893
parent	b9061a3e652cb7594397c38cd0078a47ddab960a (diff)
download	NAI-43f26405e818aec791b28c50373843851fe1320e.tar.gz NAI-43f26405e818aec791b28c50373843851fe1320e.zip