TinyLlama-API

Sleeping

App Files Files Community

Drag2121 commited on Sep 17, 2024

Commit

0af890e

1 Parent(s): 0ba5adf

lanchain ollama not chatollama

Browse files

Files changed (2) hide show

app.py +8 -12
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,11 +2,10 @@ import os
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
-from langchain_ollama import ChatOllama
-from langchain.schema import HumanMessage
 import logging
 from functools import lru_cache
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -16,7 +15,7 @@ MODEL_NAME = 'phi3:mini'
 @lru_cache()
 def get_llm():
-    return ChatOllama(model=MODEL_NAME)
 class Question(BaseModel):
     text: str
@@ -30,10 +29,9 @@ async def ask_question(question: Question):
     try:
         logger.info(f"Received question: {question.text}")
         llm = get_llm()
-        messages = [HumanMessage(content=question.text)]
-        response = llm(messages)
         logger.info("Response generated successfully")
-        return {"answer": response.content}
     except Exception as e:
         logger.error(f"Error in /ask endpoint: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
@@ -43,14 +41,12 @@ async def ask_question_stream(question: Question):
     try:
         logger.info(f"Received question for streaming: {question.text}")
         llm = get_llm()
-        messages = [HumanMessage(content=question.text)]
         async def generate():
             full_response = ""
-            async for chunk in llm.astream(messages):
-                if chunk.content:
-                    full_response += chunk.content
-                    yield chunk.content
             # Log the full response after streaming is complete
             logger.info(f"Full streamed response: {full_response}")

 from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
+from langchain_community.llms import Ollama
+from langchain_core.messages import HumanMessage
 import logging
 from functools import lru_cache
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 @lru_cache()
 def get_llm():
+    return Ollama(model=MODEL_NAME)
 class Question(BaseModel):
     text: str
     try:
         logger.info(f"Received question: {question.text}")
         llm = get_llm()
+        response = llm.invoke(question.text)
         logger.info("Response generated successfully")
+        return {"answer": response}
     except Exception as e:
         logger.error(f"Error in /ask endpoint: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
     try:
         logger.info(f"Received question for streaming: {question.text}")
         llm = get_llm()
         async def generate():
             full_response = ""
+            async for chunk in llm.astream(question.text):
+                full_response += chunk
+                yield chunk
             # Log the full response after streaming is complete
             logger.info(f"Full streamed response: {full_response}")

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ fastapi
 uvicorn[standard]
 langchain-ollama
 langchain
-pydantic

 uvicorn[standard]
 langchain-ollama
 langchain
+pydantic
+langchain-community